START_TIME = Sys.time()
#
# Make maps of texas that show the geographic distribution of
# tracking, achievement, and population
# 

library(sf)
library(dplyr)
library(stringr)
library(reshape)

script.name = basename(sys.frame(1)$ofile)
scripts.dir <- dirname(sys.frame(1)$ofile)
print(scripts.dir)
setwd(scripts.dir)

setwd('..')

main.dir <- getwd()
out.dir = paste(main.dir, 'output', sep='/')

print(main.dir)
setwd(main.dir)

# shapefiles
dist_tbl = read_sf('data/tl_2018_48_unsd/tl_2018_48_unsd.shp')
cities_tbl = read_sf('data/USA_Major_Cities/USA_Major_Cities.shp')

# texas cities
cities.0 = cities_tbl[cities_tbl$ST == 'TX', ]
cities.1 = cities.0[order(cities.0$POPULATION, decreasing=T), ]
cities.2 = cities.1[c(1:4, 6), ]
cities.3 = st_transform(cities.2, crs='ESRI:102008')

# tracking measures
track.0 = read.csv('data/tracking_9-f5.csv')
track.0$elem.mid = ifelse(track.0$grade <= 5, 'elem', 'mid')

track.1 = track.0 %>%
  filter(subj == 'math') %>%
  group_by(elem.mid, distnum0) %>%
  summarize(trk_fa=sum(trk_fa_mnsch * schsz) / sum(schsz),
            trk_mr=sum(trk_mr_mnsch * schsz) / sum(schsz),
            .groups='keep')
track.1 = as.data.frame(track.1)
track.2 = as.data.frame(cast(track.1, distnum0 ~ elem.mid, value='trk_fa'))
rownames(track.2) = paste('TX', str_pad(track.2$distnum0, 6, pad = "0"), sep='-')

track.3 = track.0 %>%
  filter(subj == 'math', grade == 4) %>%
  group_by(distnum0) %>%
  summarize(zmn=sum(prev_score_mn * schsz) / sum(schsz),
            zsd=sum(prev_score_sd * schsz) / sum(schsz),
            .groups='keep')
track.3 = as.data.frame(track.3)
rownames(track.3) = paste('TX', str_pad(track.3$distnum0, 6, pad = "0"), sep='-')

# school districts
dist.alt.0 = read.csv('data/ArchivedSchoolAndDistrictFall2020.csv')
keeps = c('District.Number', 'NCES.District.ID', 'District.Enrollment.as.of.Oct.2019')
renames = c('distnum0', 'NCES.District.ID', 'nstud')
dist.0 = dist.alt.0[!duplicated(dist.alt.0$District.Number), keeps]
names(dist.0) = renames
dist.0$distnum0 = as.numeric(gsub("'", '', dist.0$distnum0))
dist.0$State.District.ID = paste('TX', str_pad(dist.0$distnum0, 6, pad = "0"), sep='-')
dist.0$NCES.District.ID = as.numeric(gsub("'", '', dist.0$NCES.District.ID))
dist.0 = dist.0[!is.na(dist.0$NCES.District.ID), ]

dist.0$trk.elem = track.2[dist.0$State.District.ID, 'elem']
dist.0$trk.mid = track.2[dist.0$State.District.ID, 'mid']
dist.0$zmn = track.3[dist.0$State.District.ID, 'zmn']
dist.0$zsd = track.3[dist.0$State.District.ID, 'zsd']

dist.0$tag = with(dist.0, paste('tag', NCES.District.ID, 'end', sep='.'))
dist_tbl$tag = with(dist_tbl, paste('tag', GEOID, 'end', sep='.'))
rownames(dist.0) = dist.0$tag
dist_tbl$distnum0 = dist.0[dist_tbl$tag, 'State.District.ID']
dist_tbl$trk.elem = dist.0[dist_tbl$tag, 'trk.elem']
dist_tbl$trk.mid = dist.0[dist_tbl$tag, 'trk.mid']
dist_tbl$zmn = dist.0[dist_tbl$tag, 'zmn']
dist_tbl$zsd = dist.0[dist_tbl$tag, 'zsd']
dist_tbl$nstud = as.numeric(dist.0[dist_tbl$tag, 'nstud'])
dist_tbl$stud.dens = dist_tbl$nstud * (10^6) / dist_tbl$ALAND

# transform to a nice projection
dist_proj = st_transform(dist_tbl, crs='ESRI:102008')
tx.0 = st_union(dist_proj)

dist_proj$trk.elem.cat = cut(dist_proj$trk.elem, breaks=c(0:6, 10) / 10)
dist_proj$trk.mid.cat = cut(dist_proj$trk.mid, breaks=c(0:6, 10) / 10)
dist_proj$zmn.cat = cut(dist_proj$zmn, breaks=c(-Inf, -0.25, -0.1, 0, 0.1, 0.25, Inf))
dist_proj$zsd.cat = cut(dist_proj$zsd, breaks=c(-Inf, 0.80, 0.83, 0.86, 0.89, 0.92, Inf))
dist_proj$stud.dens.cat = cut(dist_proj$stud.dens, breaks=c(0, 2, 5, 10, 25, 50, 1000))

dist.1 = as.data.frame(dist_proj)

specs.0 = data.frame(yname=c('trk.elem', 'trk.mid', 'zmn', 'zsd', 'stud.dens'),
                     title.str=c('A. Mean Absolute Tracking Measure\nBy School District (Grades 4-5)',
                                 'B. Mean Absolute Tracking Measure\nBy School District (Grades 6-8)',
                                 'Mean Math Z-Score (Grade 3)',
                                 'Average Cohort Math Z-Score SD (Grade 3)',
                                 'Density\nStudents Per sq km'),
                     fn=c('elem', 'mid', 'zmn', 'zsd', 'dens'))



for(j in 2){
  col.start = ifelse(j == 1, 'green', 'grey80')
  col.end = ifelse(j == 1, 'blue', 'grey20')
  for(i in 1:5){
    print(out.dir)
    setwd(out.dir)
    
    if(specs.0$fn[i] %in% c('elem', 'mid')){
      fn0 = paste('tracking_10-f5-map-', specs.0$fn[i], j, '.png', sep='')
    }else{
      fn0 = paste('tracking_10-fc6-map-', specs.0$fn[i], j, '.png', sep='')
    }
    
    png(fn0, width=1500, heigh=1500)
    par(mar=c(1, 1, 8, 1))
    
    windowsFonts(A = windowsFont("Calibri (Body)"))
    par(family='A')
    
    plot(tx.0,
         lwd=1, border='black', col=NA, bg='white',
         font.main=1,
         reset=F, cex.main=4,
         main=specs.0$title.str[i])
    
    x.lo = -1000000
    x.hi = 500000
    y.lo = -1600000
    y.hi = -400000
    
    yn = specs.0$yname[i]
    yn2 = paste(yn, 'cat', sep='.')
    
    col.3 = colorRampPalette(c(col.start, col.end))(7)
    col.2 = col.3[dist.1[, yn2]]
    col.2[is.na(col.2)] = '#00000000'
    
    plot(dist_proj[, yn],
         lwd=1, border='white', col=col.2, add=T)
    
    # redraw borders
    plot(tx.0,
         lwd=1, border='black', col=NA, add=T)
    
    city.col = 'black'
    plot(cities.3[, 'ST'], pch=19, col=city.col, cex=4, add=T)
    
    sz = 4
    # Manually label and point to cities
    text(x.lo + (0 * (x.hi - x.lo)), y.lo + (0.5 * (y.hi - y.lo)),
         labels='El Paso', pos=1, cex=sz)
    text(x.lo + (0.65 * (x.hi - x.lo)), y.lo + (0.75 * (y.hi - y.lo)),
         labels='Dallas', pos=3, cex=sz)
    text(x.lo + (0.8 * (x.hi - x.lo)), y.lo + (0.25 * (y.hi - y.lo)),
         labels='Houston', pos=1, cex=sz)
    text(x.lo + (0.7 * (x.hi - x.lo)), y.lo + (0.15 * (y.hi - y.lo)),
         labels='Austin', pos=1, cex=sz)
    text(x.lo + (0.3 * (x.hi - x.lo)), y.lo + (0.16 * (y.hi - y.lo)),
         labels='San Antonio', pos=1, cex=4)
    
    segments(x.lo + (0 * (x.hi - x.lo)), y.lo + (0.5 * (y.hi - y.lo)),
             -949000, -911000, lwd=2)
    segments(x.lo + (0.65 * (x.hi - x.lo)), y.lo + (0.75 * (y.hi - y.lo)),
             -71069.87, -846482.6, lwd=2)
    segments(x.lo + (0.8 * (x.hi - x.lo)), y.lo + (0.25 * (y.hi - y.lo)),
             58822.32, -1196353, lwd=2)
    segments(x.lo + (0.7 * (x.hi - x.lo)), y.lo + (0.15 * (y.hi - y.lo)),
             -160884.1, -1136440, lwd=2)
    segments(x.lo + (0.3 * (x.hi - x.lo)), y.lo + (0.16 * (y.hi - y.lo)),
             -232794.7, -1232111, lwd=2)
    
    nl = length(levels(dist.1[, yn2]))
    legend('topleft', legend=c(levels(dist.1[, yn2]), 'No Data'),
           fill=c(col.3[1:nl], 'white'), border=c(rep('white', nl), 'black'), box.col=c('white'), cex=sz)
    
    dev.off()
  }
}


par(mar=c(1, 1, 1.2, 1))


END_TIME = Sys.time()
print(paste('Time elapsed in ', script.name, ':', sep=''))
print(END_TIME - START_TIME)




